library(specr)
library(lme4)
library(readr)
library(broom.mixed)
library(texreg)
library(dotwhisker)
library(dplyr)
library(fixest)

stars <- c(0.001, 0.01, 0.05, 0.1)

# Mode function
source("C:/Users/Frederik Gremler/Documents/paper_content_lea/analysis/helper_functions_eo2_extraction_analysis.R")

# load in data
epr_yearly_group_clusters_h1 <- read_csv("data/epro_data_h1.csv")
epro_year_group_clusters <- read_csv("data/epro_data_h2.csv")
epro_year_group_clusters_geo <- read_csv("data/epro_geo_data_h3.csv")

mean_h1 <- epr_yearly_group_clusters_h1 %>% 
  filter(year >= 2000) %>%# only obs after 2000 because of EPR 2.0 coding
  group_by(gwgroupid, countries_gwid) %>%
  summarize(
    across(c(max_prop_agree_any, agreement, n_orgs, resource_and_agriculture, regaut, groupsize, incidence_flag, status_pwrrank, any_multiethnic),
           ~ mean(.x, na.rm = T)),
    none_one_or_both_nats_agri_char = Mode(as.factor(none_one_or_both_nats_agri_char)),
    agreement_min = min(agreement, na.rm = T)
    )

mean_h2 <- epro_year_group_clusters %>% 
  filter(year >= 2000) %>%# only obs after 2000 because of EPR 2.0 coding
  group_by(gwgroupid, countries_gwid) %>%
  summarize(
    across(c(max_prop_agree_any, agreement, n_orgs, n_ed_religions, hhi_rel, religious_segments_bin, regaut, groupsize, incidence_flag, status_pwrrank, any_multiethnic),
           ~ mean(.x, na.rm = T)))

mean_h3 <- epro_year_group_clusters_geo %>% 
  filter(year >= 2000) %>%# only obs after 2000 because of EPR 2.0 coding
  group_by(gwgroupid, countries_gwid) %>%
  summarize(
    across(c(max_prop_agree_any, n_non_intersection_group_polygons, multiple_polygons_bin, spatial_hhi, agreement, n_orgs, regaut, groupsize, incidence_flag, status_pwrrank, any_multiethnic),
           ~ mean(.x, na.rm = T)))




#######
## H1
######


full_model_h1 <- lm(agreement ~ resource_and_agriculture + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                       data = mean_h1)

full_model_h1_max <- lm(max_prop_agree_any ~ resource_and_agriculture + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                           data = mean_h1)

full_model_h1_n_resources <- lm(agreement ~ none_one_or_both_nats_agri_char + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                                   data = mean_h1)

full_model_h1_n_resources_max <- lm(max_prop_agree_any ~ none_one_or_both_nats_agri_char + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                                       data = mean_h1)


screenreg(list(full_model_h1, full_model_h1_max, full_model_h1_n_resources, full_model_h1_n_resources_max), stars = stars)

########
### H2
########

full_model_h2 <- lm(agreement ~ religious_segments_bin + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                    data = mean_h2)

# n rels as IV
full_model_h2_n_rels <- lm(agreement ~ n_ed_religions + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                           data = mean_h2)


full_model_h2_herf <- lm(agreement ~ hhi_rel + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                         data = mean_h2)


# full_model_h2_no1clusters <- glmer(agreement ~ religious_segments_bin + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic
#                        +(1+ avg_nightlight_total_pc_log + n_tek_groups |gwgroupid) + (1+ avg_nightlight_total_pc_log + n_tek_groups |countries_gwid), data = epro_year_group_clusters %>% filter(n_orgs > 1), family = "binomial")

full_model_h2_max <- lm(max_prop_agree_any ~ religious_segments_bin + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                        data = mean_h2)

full_model_h2_max_n_rels <- lm(max_prop_agree_any ~ n_ed_religions + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                                  data = mean_h2)

full_model_h2_max_herf <- lm(max_prop_agree_any ~ hhi_rel + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                                data = mean_h2)

screenreg(list(full_model_h2, full_model_h2_n_rels, full_model_h2_herf, full_model_h2_max, full_model_h2_max_n_rels, full_model_h2_max_herf),
          stars = stars)

###########
### H3
##########

full_model_h3 <- lm(agreement ~ n_non_intersection_group_polygons + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                       data = mean_h3)

full_model_h3_max <- lm(max_prop_agree_any ~ n_non_intersection_group_polygons + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                          data = mean_h3)

full_model_h3_bin <- lm(agreement ~ multiple_polygons_bin + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                        data = mean_h3)

full_model_h3_max_bin <- lm(max_prop_agree_any ~  multiple_polygons_bin + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                               data = mean_h3)

full_model_h3_hhi <- lm(agreement ~ spatial_hhi + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                        data = mean_h3)

full_model_h3_max_hhi <- lm(max_prop_agree_any ~ spatial_hhi + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                            data = mean_h3)

full_model_h3_prop_no_outliers <- lm(agreement ~ n_non_intersection_group_polygons + n_orgs + regaut + groupsize + incidence_flag + status_pwrrank + any_multiethnic,
                                       data = mean_h3)

screenreg(list(full_model_h3, full_model_h3_max, full_model_h3_hhi, full_model_h3_max_hhi, full_model_h3_bin, full_model_h3_max_bin), stars = stars)







